clear
clear matrix
set matsize 11000, permanently
set maxvar 32767 
set matsize 800

********************************************************************************

* create quinquennial data and quartiles according to individual productivity normalised by means by quartile/ssd/period - 

use people
sort id_prof anno
merge 1:1 id_prof anno using nisi
tab _merge
drop _merge
sort id_prof anno
merge 1:1 id_prof anno using hindex
tab _merge
drop _merge
compress

encode ssd, g(ssdcode)
************timing per periods**********************************************************
* this ensure that variable ateneo/ssd/regione/areacun/submacro are observed in the final year of the period

gen id_unifinal=id_uni
replace id_unifinal=. if anno!=1994&anno!=1998&anno!=2002&anno!=2006&anno!=2011

gen ssdcodefinal=ssdcode
label value ssdcodefinal ssdcode
replace ssdcodefinal=. if anno!=1994&anno!=1998&anno!=2002&anno!=2006&anno!=2011
g regionefinal=region
label value regionefinal region
replace regionefinal=. if anno!=1994&anno!=1998&anno!=2002&anno!=2006&anno!=2011
gen areacunfinal=areacun
label value areacunfinal areacun
replace areacunfinal=. if anno!=1994&anno!=1998&anno!=2002&anno!=2006&anno!=2011
gen submacrofinal=submacro
label value submacrofinal submacro
replace submacrofinal=. if anno!=1994&anno!=1998&anno!=2002&anno!=2006&anno!=2011

********************************sample selection**************************************

drop if ssdcode==.
drop if submacro==.
drop if areacun==99

fillin ssdcode period fascia
tab _fillin

******************************************************************************************************************************
* code to get new_prof for RESTAT - R1 
******************************************************************************************************************************
preserve
collapse (sum) newass neword, by(ssdcodefinal period)
drop if ssd==.
save new_prof, replace 
restore

xtset id_prof anno
gen switcher=(ssdcode!=L.ssdcode & id_prof==L.id_prof)
preserve
collapse (sum) switcher, by (subject fascia period)
export excel ddd_subject.xls, replace
restore
*
*********************************generate individual period productivity in terms of additional papers***********************
* computing the average productivity (measured by CUMULATED OUTPUT by fascia&ssd) in each of 5 periods
egen soutput1e=sum(output1e), by(id_prof period)
sum soutput1e
replace soutput1e=0 if soutput1e==.
label var soutput1e "number of new papers weighted 2/(1+coauthors) in 5-year interval"
sum soutput1e

egen avH=mean(H), by(id_prof period)
sum avH H
replace avH=0 if avH==.
label var avH "average H index in 5-year interval"
sum avH

egen soutput1b=sum(output1b), by(id_prof period)
sum soutput1b
replace soutput1b=0 if soutput1b==.
label var soutput1b "number of new papers weighed by quality in 5-year interval"
sum soutput1b

*********************************compute average productivity in relative terms*******************

set seed 1
gen tmp1=soutput1e+(runiform()-0.5)/10000
gen tmp2=soutput1b+(runiform()-0.5)/10000
egen fsoutput1e=mean(tmp1), by(id_prof period)
egen fsoutput1b=mean(tmp2), by(id_prof period)
drop tmp*

gen temp1e=.
gen temp1b=.

* instead of usign quartiles we use below the median / 6th and 7th decile / 8th and 9th decile / top decile 

levelsof ssdcode, local(sec)
levelsof period,  local(per)
levelsof fascia,  local(cat)

gen decile1e=.
gen decile1b=.
foreach secv of local sec {
foreach perv of local per {
foreach catv of local cat {
egen tmp=xtile(fsoutput1e) if ssdcode==`secv' & period==`perv'&fascia==`catv', nq(10)
replace decile1e=tmp if decile1e==.
replace temp1e=tmp if ssdcode==`secv' & period==`perv'&fascia==`catv'
recode temp1e 1 2 3 4 5 = 1 6 7 = 2 8 9 = 3 10 = 4 if ssdcode==`secv' & period==`perv'&fascia==`catv'
drop tmp

egen tmp=xtile(fsoutput1b) if ssdcode==`secv' & period==`perv'&fascia==`catv', nq(10)
replace decile1b=tmp if decile1b==.
replace temp1b=tmp if ssdcode==`secv' & period==`perv'&fascia==`catv'
recode temp1b 1 2 3 4 5 = 1 6 7 = 2 8 9 = 3 10 = 4 if ssdcode==`secv' & period==`perv'&fascia==`catv'
drop tmp
dis "sector" `secv' "period" `perv' "fascia" `catv'
}
}
}
***** quartile are defined as the position at the initial year of the period - in case of later entrance
***** the initial position is retained - this remains even if there is a change within the period
***** the number of changers in the period can be seen in the matrix

tab temp1e temp1b, m

sort id_prof anno
egen t1=min(anno), by(id_prof period)
g t2=1 if anno==t1
g t3e=temp1e*t2
g t3b=temp1b*t2
egen quartile1e=max(t3e), by(id_prof period)
egen quartile1b=max(t3b), by(id_prof period)
tab temp1e quartile1e, m
tab temp1b quartile1b, m
drop t1 t2 t3e t3b temp1e temp1b

drop if _fillin==1
drop _fillin
xtset id_prof anno

********************************compute the relative proportions of available seats - by year*************

egen tk1=sum(ricass) if (fascia==1|ricass==1), by(anno ssd)
egen k1=max(tk1), by(anno ssd)
drop tk1
egen tn1=count(id_prof) if (fascia==1|ricass==1), by(anno ssd)
egen n1=max(tn1), by(anno ssd)
gen gR=k1/n1

label var gR "ex-ante yearly probability of winning associate professorship"

egen tk2a=sum(ricord) , by(anno ssd)
egen tk2b=sum(assord) if (fascia==2|assord==1), by(anno ssd)
g tk2=tk2a+tk2b
egen k2=max(tk2), by(anno ssd)
drop tk2
egen tn2=count(id_prof) if (fascia==2|assord==1), by(anno ssd)
egen n2=max(tn2), by(anno ssd)
gen gA=k2/n2
label var gA "ex-ante yearly probability of winning full professorship"

***********************************************************************************************************
egen shfemale=mean(female), by(period ssd fascia)
egen avage=mean(age), by(period ssd fascia)
label var avage "average age by ssd/fascia/period"
label var shfemale "female share by ssd/fascia/period"

egen tmp=mean(female) if fascia==1, by(period ssd)
egen sh1female=max(tmp), by(period ssd)
drop tmp
egen tmp=mean(age) if fascia==1, by(period ssd )
egen av1age=max(tmp), by(period ssd)
drop tmp

egen tmp=mean(female) if fascia==2, by(period ssd)
egen sh2female=max(tmp), by(period ssd)
drop tmp
egen tmp=mean(age) if fascia==2, by(period ssd )
egen av2age=max(tmp), by(period ssd)
drop tmp

egen tmp=mean(female) if fascia==3, by(period ssd)
egen sh3female=max(tmp), by(period ssd)
drop tmp
egen tmp=mean(age) if fascia==3, by(period ssd )
egen av3age=max(tmp), by(period ssd)
drop tmp

egen tmp=sd(age) if fascia==3, by(period ssd )
egen sd3age=max(tmp), by(period ssd)
drop tmp

label var av1age "average age by ssd/period - assistant professor"
label var sh1female "female share by ssd/period - assistant professor"
label var av2age "average age by ssd/period - associate professor"
label var sh2female "female share by ssd/period - associate professor"
label var av3age "average age by ssd/period - full professor"
label var sh3female "female share by ssd/period - full professor"
label var sd3age "sd.dev age by ssd/period - full professor"

compress
save datacdv2, replace


